{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "fd253f4f-4cfc-4f87-9b6d-e52a297f0875",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   province        gdp    rise\n",
      "0        省市     数值(亿元)  增长率(%)\n",
      "1        广东  135673.16     4.8\n",
      "2        江苏   128222.2     5.8\n",
      "3        山东      92069       6\n",
      "4        浙江      82553       6\n",
      "5        四川    60132.9       6\n",
      "6        河南   59132.39     4.1\n",
      "7        湖北   55803.63       6\n",
      "8        福建    54355.1     4.5\n",
      "9        湖南   50012.85     4.6\n",
      "10       上海   47218.66       5\n",
      "11       安徽    47050.6     5.8\n",
      "12       河北    43944.1     5.5\n",
      "13       北京    43760.7     5.2\n",
      "14       陕西   33786.07     4.3\n",
      "15       江西    32200.1     4.1\n",
      "16       辽宁    30209.4     5.3\n",
      "17       重庆   30145.79     6.1\n",
      "18       云南      30021     4.4\n",
      "19       广西   27202.39     4.1\n",
      "20       山西   25698.18       5\n",
      "21      内蒙古      24627     7.3\n",
      "22       贵州   20913.25     4.9\n",
      "23       新疆   19125.91     6.8\n",
      "24       天津    16737.3     4.3\n",
      "25      黑龙江    15883.9     2.6\n",
      "26       吉林   13531.19     6.3\n",
      "27       甘肃    11863.8     6.4\n",
      "28       海南    7551.18     9.2\n",
      "29       宁夏    5314.95     6.6\n",
      "30       青海     3799.1     5.3\n",
      "31       西藏    2392.67     9.5\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import pandas as pd\n",
    "\n",
    "response = requests.get('http://gdp.gotohui.com/')\n",
    "html_text = response.text\n",
    "soup = BeautifulSoup(html_text, 'html.parser')\n",
    "table = soup.find('div', class_='col-xs-7 aside top0').find('table', class_='ntable')\n",
    "rows = table.find_all('tr')\n",
    "df = pd.DataFrame(columns=['province', 'gdp', 'rise'])\n",
    "for tr in rows:\n",
    "    if tr.get_text().find('th') >= 0:\n",
    "        continue\n",
    "    data = []\n",
    "    td = tr.find_all('td')\n",
    "    for i in td:\n",
    "        if i.get_text().find('a') >= 0:\n",
    "            data.append(i.find('a').get_text())\n",
    "        else:\n",
    "            data.append(i.get_text())\n",
    "    if len(data) == len(df.columns):\n",
    "        df.loc[len(df)] = data\n",
    "df.to_excel(\"D:\\\\2309030117\\\\gdp_data.xlsx\", \"Sheet1\")\n",
    "print(df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9a2d6389-12b0-4a39-b336-e81bb08a9cdd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   province income  rise\n",
      "0        上海  84034  1605\n",
      "1        北京  84023  2505\n",
      "2        浙江  71268  2781\n",
      "3        江苏  60178  2435\n",
      "4        广东  56905  2051\n",
      "5        福建  53817  2677\n",
      "6        天津  53003  1517\n",
      "7        山东  49050  1984\n",
      "8        湖南  47301  2435\n",
      "9       内蒙古  46295  1918\n",
      "10       重庆  45509  2007\n",
      "11       安徽  45133  2124\n",
      "12       辽宁  44003   952\n",
      "13       江西  43697  2013\n",
      "14       四川  43233  1789\n",
      "15       湖北  42626  2348\n",
      "16       陕西  42431  1718\n",
      "17       云南  42168  1263\n",
      "18       河北  41278  1487\n",
      "19       贵州  41086  1875\n",
      "20       海南  40118   -95\n",
      "21       广西  39703  1173\n",
      "22       山西  39532  2099\n",
      "23       青海  38736   991\n",
      "24       河南  38484  1389\n",
      "25       新疆  38410   768\n",
      "26       甘肃  37572  1385\n",
      "27       吉林  35471  -175\n",
      "28      黑龙江  35042  1396\n",
      "29       宁夏  40194      \n",
      "30       西藏  48753      \n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import pandas as pd\n",
    "\n",
    "response = requests.get('https://www.gotohui.com/life/')\n",
    "html_text = response.text\n",
    "soup = BeautifulSoup(html_text, 'html.parser')\n",
    "table = soup.find('div', class_='col-xs-7 aside top0').find('table', class_='ntable table-striped table-hover')\n",
    "rows = table.find_all('tr')\n",
    "df = pd.DataFrame(columns=['province', 'income', 'rise'])\n",
    "for tr in rows:\n",
    "    if tr.get_text().find('th') >= 0:\n",
    "        continue\n",
    "    data = []\n",
    "    td = tr.find_all('td')\n",
    "    for i in td:\n",
    "        if i.get_text().find('a') >= 0:\n",
    "            data.append(i.find('a').get_text())\n",
    "        else:\n",
    "            data.append(i.get_text())\n",
    "    if len(data) == len(df.columns):\n",
    "        df.loc[len(df)] = data\n",
    "df.to_excel(\"D:\\\\2309030117\\\\income_data.xlsx\", \"Sheet1\")\n",
    "print(df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d3c9ebd9-cc9d-472a-82ae-3f9743ffb7d1",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
